{
 "cells": [
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "# EXTRA COVARIATES and TASKS CREATION"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 1,
   "metadata": {},
   "outputs": [
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "/Users/XXXX/miniconda3/envs/pytorch/lib/python3.6/importlib/_bootstrap.py:219: RuntimeWarning: numpy.dtype size changed, may indicate binary incompatibility. Expected 96, got 88\n",
      "  return f(*args, **kwds)\n"
     ]
    }
   ],
   "source": [
    "import pandas as pd\n",
    "import numpy as np\n",
    "import datetime\n",
    "from datetime import timedelta\n",
    "file_path=\"~/Documents/Data/Full_MIMIC/\"\n",
    "outfile_path=\"~/Documents/Data/Full_MIMIC/Clean_data/\""
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "### EXTRA TASKS"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 2,
   "metadata": {},
   "outputs": [],
   "source": [
    "unique_id_dict=pd.read_csv(outfile_path+\"UNIQUE_ID_dict.csv\")\n",
    "hadm_ids=unique_id_dict[\"HADM_ID\"]\n",
    "admissions=pd.read_csv(file_path+\"Admissions_processed.csv\")\n",
    "adm=admissions.loc[admissions[\"HADM_ID\"].isin(hadm_ids)].copy() #Select the patients selected earlier."
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 3,
   "metadata": {},
   "outputs": [],
   "source": [
    "#We create a new vector for the duration stay.\n",
    "adm[\"admit\"]=pd.to_datetime(adm[\"ADMITTIME\"])\n",
    "adm[\"disc\"]=pd.to_datetime(adm[\"DISCHTIME\"])\n",
    "adm[\"duration\"]=adm[\"disc\"]-adm[\"admit\"]\n",
    "adm[\"Value\"]=adm[\"duration\"].dt.days"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 4,
   "metadata": {},
   "outputs": [],
   "source": [
    "d=unique_id_dict[[\"HADM_ID\",\"UNIQUE_ID\"]].set_index(\"HADM_ID\").to_dict()[\"UNIQUE_ID\"]\n",
    "adm[\"UNIQUE_ID\"]=adm[\"HADM_ID\"].map(d)\n",
    "\n",
    "durations_df=adm[[\"UNIQUE_ID\",\"Value\"]].copy()\n",
    "durations_df.sort_values(by=\"UNIQUE_ID\",inplace=True)\n",
    "durations_df.to_csv(outfile_path+\"complete_durations_vec.csv\")"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 5,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>UNIQUE_ID</th>\n",
       "      <th>duration_days</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>12567</th>\n",
       "      <td>0</td>\n",
       "      <td>14</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>16312</th>\n",
       "      <td>1</td>\n",
       "      <td>12</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>16974</th>\n",
       "      <td>2</td>\n",
       "      <td>3</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>75</th>\n",
       "      <td>3</td>\n",
       "      <td>5</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>11339</th>\n",
       "      <td>4</td>\n",
       "      <td>5</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>23224</th>\n",
       "      <td>5</td>\n",
       "      <td>9</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>6257</th>\n",
       "      <td>6</td>\n",
       "      <td>3</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>5714</th>\n",
       "      <td>7</td>\n",
       "      <td>10</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>18023</th>\n",
       "      <td>8</td>\n",
       "      <td>13</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>22326</th>\n",
       "      <td>9</td>\n",
       "      <td>3</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>977</th>\n",
       "      <td>10</td>\n",
       "      <td>4</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>4234</th>\n",
       "      <td>11</td>\n",
       "      <td>3</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>21596</th>\n",
       "      <td>12</td>\n",
       "      <td>3</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>15734</th>\n",
       "      <td>13</td>\n",
       "      <td>14</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>17426</th>\n",
       "      <td>14</td>\n",
       "      <td>4</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>22325</th>\n",
       "      <td>15</td>\n",
       "      <td>5</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>6791</th>\n",
       "      <td>16</td>\n",
       "      <td>5</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>9101</th>\n",
       "      <td>17</td>\n",
       "      <td>7</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>20229</th>\n",
       "      <td>18</td>\n",
       "      <td>3</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>600</th>\n",
       "      <td>19</td>\n",
       "      <td>4</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>16744</th>\n",
       "      <td>20</td>\n",
       "      <td>4</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>4738</th>\n",
       "      <td>21</td>\n",
       "      <td>11</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>16989</th>\n",
       "      <td>22</td>\n",
       "      <td>18</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>11217</th>\n",
       "      <td>23</td>\n",
       "      <td>6</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>810</th>\n",
       "      <td>24</td>\n",
       "      <td>9</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>20317</th>\n",
       "      <td>25</td>\n",
       "      <td>7</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>16179</th>\n",
       "      <td>26</td>\n",
       "      <td>7</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>19957</th>\n",
       "      <td>27</td>\n",
       "      <td>5</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>12444</th>\n",
       "      <td>28</td>\n",
       "      <td>4</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>16776</th>\n",
       "      <td>29</td>\n",
       "      <td>7</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>...</th>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>19761</th>\n",
       "      <td>21220</td>\n",
       "      <td>5</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>815</th>\n",
       "      <td>21221</td>\n",
       "      <td>3</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>18694</th>\n",
       "      <td>21222</td>\n",
       "      <td>7</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2087</th>\n",
       "      <td>21223</td>\n",
       "      <td>5</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>7237</th>\n",
       "      <td>21224</td>\n",
       "      <td>4</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>5698</th>\n",
       "      <td>21225</td>\n",
       "      <td>8</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2995</th>\n",
       "      <td>21226</td>\n",
       "      <td>7</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>23058</th>\n",
       "      <td>21227</td>\n",
       "      <td>8</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>7913</th>\n",
       "      <td>21228</td>\n",
       "      <td>19</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>5014</th>\n",
       "      <td>21229</td>\n",
       "      <td>6</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>6532</th>\n",
       "      <td>21230</td>\n",
       "      <td>9</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>5770</th>\n",
       "      <td>21231</td>\n",
       "      <td>10</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>7188</th>\n",
       "      <td>21232</td>\n",
       "      <td>6</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>3398</th>\n",
       "      <td>21233</td>\n",
       "      <td>7</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2469</th>\n",
       "      <td>21234</td>\n",
       "      <td>3</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>3908</th>\n",
       "      <td>21235</td>\n",
       "      <td>14</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>18263</th>\n",
       "      <td>21236</td>\n",
       "      <td>3</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>7477</th>\n",
       "      <td>21237</td>\n",
       "      <td>5</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>8928</th>\n",
       "      <td>21238</td>\n",
       "      <td>9</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>15787</th>\n",
       "      <td>21239</td>\n",
       "      <td>6</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>22750</th>\n",
       "      <td>21240</td>\n",
       "      <td>16</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>15905</th>\n",
       "      <td>21241</td>\n",
       "      <td>6</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>20926</th>\n",
       "      <td>21242</td>\n",
       "      <td>7</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>19432</th>\n",
       "      <td>21243</td>\n",
       "      <td>26</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>19314</th>\n",
       "      <td>21244</td>\n",
       "      <td>7</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1641</th>\n",
       "      <td>21245</td>\n",
       "      <td>17</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>23013</th>\n",
       "      <td>21246</td>\n",
       "      <td>4</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>8111</th>\n",
       "      <td>21247</td>\n",
       "      <td>6</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>3025</th>\n",
       "      <td>21248</td>\n",
       "      <td>7</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1117</th>\n",
       "      <td>21249</td>\n",
       "      <td>5</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "<p>21250 rows × 2 columns</p>\n",
       "</div>"
      ],
      "text/plain": [
       "       UNIQUE_ID  duration_days\n",
       "12567          0             14\n",
       "16312          1             12\n",
       "16974          2              3\n",
       "75             3              5\n",
       "11339          4              5\n",
       "23224          5              9\n",
       "6257           6              3\n",
       "5714           7             10\n",
       "18023          8             13\n",
       "22326          9              3\n",
       "977           10              4\n",
       "4234          11              3\n",
       "21596         12              3\n",
       "15734         13             14\n",
       "17426         14              4\n",
       "22325         15              5\n",
       "6791          16              5\n",
       "9101          17              7\n",
       "20229         18              3\n",
       "600           19              4\n",
       "16744         20              4\n",
       "4738          21             11\n",
       "16989         22             18\n",
       "11217         23              6\n",
       "810           24              9\n",
       "20317         25              7\n",
       "16179         26              7\n",
       "19957         27              5\n",
       "12444         28              4\n",
       "16776         29              7\n",
       "...          ...            ...\n",
       "19761      21220              5\n",
       "815        21221              3\n",
       "18694      21222              7\n",
       "2087       21223              5\n",
       "7237       21224              4\n",
       "5698       21225              8\n",
       "2995       21226              7\n",
       "23058      21227              8\n",
       "7913       21228             19\n",
       "5014       21229              6\n",
       "6532       21230              9\n",
       "5770       21231             10\n",
       "7188       21232              6\n",
       "3398       21233              7\n",
       "2469       21234              3\n",
       "3908       21235             14\n",
       "18263      21236              3\n",
       "7477       21237              5\n",
       "8928       21238              9\n",
       "15787      21239              6\n",
       "22750      21240             16\n",
       "15905      21241              6\n",
       "20926      21242              7\n",
       "19432      21243             26\n",
       "19314      21244              7\n",
       "1641       21245             17\n",
       "23013      21246              4\n",
       "8111       21247              6\n",
       "3025       21248              7\n",
       "1117       21249              5\n",
       "\n",
       "[21250 rows x 2 columns]"
      ]
     },
     "execution_count": 5,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "durations_df"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "## Extra Covariates\n",
    "\n",
    "We want to add the covariates that are used in the SAPSII criterion."
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 23,
   "metadata": {},
   "outputs": [],
   "source": [
    "infile_path=\"~/Documents/Data/Full_MIMIC/Clean_data/\"\n",
    "hadm_ids=pd.read_csv(infile_path+\"UNIQUE_ID_dict.csv\")[\"HADM_ID\"]\n",
    "d=pd.read_csv(infile_path+\"UNIQUE_ID_dict.csv\")[[\"UNIQUE_ID\",\"HADM_ID\"]]"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 30,
   "metadata": {},
   "outputs": [],
   "source": [
    "admissions=pd.read_csv(file_path+\"Admissions_processed.csv\")\n",
    "adm=admissions.loc[admissions[\"HADM_ID\"].isin(hadm_ids)].copy() #Select the patients selected earlier.\n",
    "adm=adm.merge(d,on=\"HADM_ID\")"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 34,
   "metadata": {},
   "outputs": [],
   "source": [
    "#Age\n",
    "adm[\"birth_date\"]=pd.to_datetime(adm[\"DOBTIME\"])\n",
    "adm[\"admit_date\"]=pd.to_datetime(adm[\"ADMITTIME\"])\n",
    "adm[\"age\"]=(adm[\"admit_date\"]-adm[\"birth_date\"]).dt.days//365"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 35,
   "metadata": {},
   "outputs": [],
   "source": [
    "#Admission type\n",
    "adm[\"EMERGENCY_ADMISSION\"]=1*(adm[\"ADMISSION_TYPE\"]==\"EMERGENCY\")\n",
    "adm[\"ELECTIVE_ADMISSION\"]=1*(adm[\"ADMISSION_TYPE\"]==\"ELECTIVE\")\n",
    "adm[\"URGENT_ADMISSION\"]=1*(adm[\"ADMISSION_TYPE\"]==\"URGENT\")"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 43,
   "metadata": {},
   "outputs": [],
   "source": [
    "cov=pd.read_csv(infile_path+\"complete_covariates.csv\")"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 44,
   "metadata": {},
   "outputs": [],
   "source": [
    "cov=cov.merge(adm[[\"age\",\"EMERGENCY_ADMISSION\",\"ELECTIVE_ADMISSION\",\"URGENT_ADMISSION\",\"UNIQUE_ID\"]],on=\"UNIQUE_ID\")"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 45,
   "metadata": {},
   "outputs": [],
   "source": [
    "cov.to_csv(infile_path+\"complete_covariates_extended.csv\")"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 10,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "array(['Anion Gap', 'Bicarbonate', 'Calcium, Total', 'Chloride',\n",
       "       'Creatinine', 'Glucose', 'Magnesium', 'Phosphate', 'Potassium',\n",
       "       'Sodium', 'Alkaline Phosphatase',\n",
       "       'Asparate Aminotransferase (AST)', 'Bilirubin, Total',\n",
       "       'Urea Nitrogen', 'Basophils', 'Eosinophils', 'Hematocrit',\n",
       "       'Hemoglobin', 'Lymphocytes', 'MCH', 'MCHC', 'MCV', 'Monocytes',\n",
       "       'Neutrophils', 'Platelet Count', 'RDW', 'Red Blood Cells',\n",
       "       'White Blood Cells', 'PTT', 'Base Excess', 'Calculated Total CO2',\n",
       "       'Lactate', 'pCO2', 'pH', 'pO2', 'PT',\n",
       "       'Alanine Aminotransferase (ALT)', 'Specific Gravity',\n",
       "       'Sodium Chloride 0.9%  FlushDrug Drug', 'D5WDrug Drug',\n",
       "       'Magnesium SulfateDrug Drug', 'Potassium ChlorideDrug Drug',\n",
       "       'Potassium Chloride', 'Calcium Gluconate', 'Magnesium Sulfate',\n",
       "       'Furosemide (Lasix)', 'Insulin - Regular', 'PO Intake',\n",
       "       'Insulin - Humalog', 'OR Crystalloid Intake', 'Morphine Sulfate',\n",
       "       'Insulin - Glargine', 'OR Cell Saver Intake', 'Sterile Water',\n",
       "       'Dextrose 5%', 'LR', 'Piggyback', 'Solution', 'KCL (Bolus)',\n",
       "       'Magnesium Sulfate (Bolus)', 'Nitroglycerin', 'Albumin', 'Foley',\n",
       "       'Chest Tube #1', 'Metoprolol TartrateDrug Drug',\n",
       "       'BisacodylDrug Drug', 'Docusate SodiumDrug Drug',\n",
       "       'AspirinDrug Drug', 'Packed Red Blood Cells', 'Phenylephrine',\n",
       "       'Gastric Meds', 'GT Flush', 'Hydralazine', 'Midazolam (Versed)',\n",
       "       'Metoprolol', 'D5 1/2NS', 'Void', 'TF Residual', 'OR EBL',\n",
       "       'Albumin 5%', 'Lorazepam (Ativan)', 'Jackson Pratt #1',\n",
       "       'Pre-Admission', 'PantoprazoleDrug Drug',\n",
       "       'Humulin-R InsulinDrug Drug', 'Stool Out Stool',\n",
       "       'Ultrafiltrate Ultrafiltrate', 'Chest Tube #2', 'Heparin Sodium',\n",
       "       'K Phos', 'Norepinephrine', 'Urine Out Incontinent',\n",
       "       'Ostomy (output)', 'Fecal Bag', 'Gastric Gastric Tube',\n",
       "       'Condom Cath'], dtype=object)"
      ]
     },
     "execution_count": 10,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "pd.read_csv(infile_path+\"label_dict.csv\")[\"LABEL\"].unique()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 25,
   "metadata": {},
   "outputs": [
    {
     "ename": "KeyboardInterrupt",
     "evalue": "",
     "output_type": "error",
     "traceback": [
      "\u001b[0;31m---------------------------------------------------------------------------\u001b[0m",
      "\u001b[0;31mKeyboardInterrupt\u001b[0m                         Traceback (most recent call last)",
      "\u001b[0;32m~/miniconda3/envs/pytorch/lib/python3.6/site-packages/pandas/io/parsers.py\u001b[0m in \u001b[0;36mread\u001b[0;34m(self, nrows)\u001b[0m\n\u001b[1;32m   1838\u001b[0m         \u001b[0;32mtry\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m-> 1839\u001b[0;31m             \u001b[0mdata\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0m_reader\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mread\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mnrows\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m   1840\u001b[0m         \u001b[0;32mexcept\u001b[0m \u001b[0mStopIteration\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n",
      "\u001b[0;32mpandas/_libs/parsers.pyx\u001b[0m in \u001b[0;36mpandas._libs.parsers.TextReader.read\u001b[0;34m()\u001b[0m\n",
      "\u001b[0;32mpandas/_libs/parsers.pyx\u001b[0m in \u001b[0;36mpandas._libs.parsers.TextReader._read_low_memory\u001b[0;34m()\u001b[0m\n",
      "\u001b[0;32mpandas/_libs/parsers.pyx\u001b[0m in \u001b[0;36mpandas._libs.parsers.TextReader._read_rows\u001b[0;34m()\u001b[0m\n",
      "\u001b[0;32mpandas/_libs/parsers.pyx\u001b[0m in \u001b[0;36mpandas._libs.parsers.TextReader._convert_column_data\u001b[0;34m()\u001b[0m\n",
      "\u001b[0;32mpandas/_libs/parsers.pyx\u001b[0m in \u001b[0;36mpandas._libs.parsers.TextReader._convert_tokens\u001b[0;34m()\u001b[0m\n",
      "\u001b[0;32mpandas/_libs/parsers.pyx\u001b[0m in \u001b[0;36mpandas._libs.parsers.TextReader._convert_with_dtype\u001b[0;34m()\u001b[0m\n",
      "\u001b[0;32m~/miniconda3/envs/pytorch/lib/python3.6/site-packages/pandas/core/dtypes/common.py\u001b[0m in \u001b[0;36mis_integer_dtype\u001b[0;34m(arr_or_dtype)\u001b[0m\n\u001b[1;32m    776\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m--> 777\u001b[0;31m \u001b[0;32mdef\u001b[0m \u001b[0mis_integer_dtype\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0marr_or_dtype\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m    778\u001b[0m     \"\"\"\n",
      "\u001b[0;31mKeyboardInterrupt\u001b[0m: ",
      "\nDuring handling of the above exception, another exception occurred:\n",
      "\u001b[0;31mKeyboardInterrupt\u001b[0m                         Traceback (most recent call last)",
      "\u001b[0;32m<ipython-input-25-e1588f6fcd49>\u001b[0m in \u001b[0;36m<module>\u001b[0;34m()\u001b[0m\n\u001b[0;32m----> 1\u001b[0;31m \u001b[0mchartevents\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0mpd\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mread_csv\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mfile_path\u001b[0m\u001b[0;34m+\u001b[0m\u001b[0;34m\"CHARTEVENTS.csv\"\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m",
      "\u001b[0;32m~/miniconda3/envs/pytorch/lib/python3.6/site-packages/pandas/io/parsers.py\u001b[0m in \u001b[0;36mparser_f\u001b[0;34m(filepath_or_buffer, sep, delimiter, header, names, index_col, usecols, squeeze, prefix, mangle_dupe_cols, dtype, engine, converters, true_values, false_values, skipinitialspace, skiprows, nrows, na_values, keep_default_na, na_filter, verbose, skip_blank_lines, parse_dates, infer_datetime_format, keep_date_col, date_parser, dayfirst, iterator, chunksize, compression, thousands, decimal, lineterminator, quotechar, quoting, escapechar, comment, encoding, dialect, tupleize_cols, error_bad_lines, warn_bad_lines, skipfooter, skip_footer, doublequote, delim_whitespace, as_recarray, compact_ints, use_unsigned, low_memory, buffer_lines, memory_map, float_precision)\u001b[0m\n\u001b[1;32m    707\u001b[0m                     skip_blank_lines=skip_blank_lines)\n\u001b[1;32m    708\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m--> 709\u001b[0;31m         \u001b[0;32mreturn\u001b[0m \u001b[0m_read\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mfilepath_or_buffer\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mkwds\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m    710\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m    711\u001b[0m     \u001b[0mparser_f\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0m__name__\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mname\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n",
      "\u001b[0;32m~/miniconda3/envs/pytorch/lib/python3.6/site-packages/pandas/io/parsers.py\u001b[0m in \u001b[0;36m_read\u001b[0;34m(filepath_or_buffer, kwds)\u001b[0m\n\u001b[1;32m    453\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m    454\u001b[0m     \u001b[0;32mtry\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m--> 455\u001b[0;31m         \u001b[0mdata\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mparser\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mread\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mnrows\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m    456\u001b[0m     \u001b[0;32mfinally\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m    457\u001b[0m         \u001b[0mparser\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mclose\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n",
      "\u001b[0;32m~/miniconda3/envs/pytorch/lib/python3.6/site-packages/pandas/io/parsers.py\u001b[0m in \u001b[0;36mread\u001b[0;34m(self, nrows)\u001b[0m\n\u001b[1;32m   1067\u001b[0m                 \u001b[0;32mraise\u001b[0m \u001b[0mValueError\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m'skipfooter not supported for iteration'\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m   1068\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m-> 1069\u001b[0;31m         \u001b[0mret\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0m_engine\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mread\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mnrows\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m   1070\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m   1071\u001b[0m         \u001b[0;32mif\u001b[0m \u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0moptions\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mget\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m'as_recarray'\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n",
      "\u001b[0;32m~/miniconda3/envs/pytorch/lib/python3.6/site-packages/pandas/io/parsers.py\u001b[0m in \u001b[0;36mread\u001b[0;34m(self, nrows)\u001b[0m\n\u001b[1;32m   1837\u001b[0m     \u001b[0;32mdef\u001b[0m \u001b[0mread\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mself\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mnrows\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0;32mNone\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m   1838\u001b[0m         \u001b[0;32mtry\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m-> 1839\u001b[0;31m             \u001b[0mdata\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0m_reader\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mread\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mnrows\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m   1840\u001b[0m         \u001b[0;32mexcept\u001b[0m \u001b[0mStopIteration\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m   1841\u001b[0m             \u001b[0;32mif\u001b[0m \u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0m_first_chunk\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n",
      "\u001b[0;31mKeyboardInterrupt\u001b[0m: "
     ]
    }
   ],
   "source": [
    "chartevents=pd.read_csv(file_path+\"CHARTEVENTS.csv\")"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 84,
   "metadata": {},
   "outputs": [],
   "source": [
    "a=None"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 83,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "oj\n"
     ]
    }
   ],
   "source": [
    "if a:\n",
    "    print(\"oj\")"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 86,
   "metadata": {},
   "outputs": [
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "/Users/XXXX/miniconda3/envs/pytorch/lib/python3.6/importlib/_bootstrap.py:219: RuntimeWarning: numpy.dtype size changed, may indicate binary incompatibility. Expected 96, got 88\n",
      "  return f(*args, **kwds)\n"
     ]
    }
   ],
   "source": [
    "from sklearn.model_selection import KFold"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 132,
   "metadata": {},
   "outputs": [],
   "source": [
    "k=KFold(n_splits=3,random_state=22)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 133,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "[ 5  6  7  8  9 10 11 12 13 14]\n",
      "[0 1 2 3 4]\n",
      "[ 0  1  2  3  4 10 11 12 13 14]\n",
      "[5 6 7 8 9]\n",
      "[0 1 2 3 4 5 6 7 8 9]\n",
      "[10 11 12 13 14]\n"
     ]
    }
   ],
   "source": [
    "for train,val in k.split(np.arange(15)):\n",
    "    print(train)\n",
    "    print(val)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 134,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "[ 5  6  7  8  9 10 11 12 13 14]\n",
      "[0 1 2 3 4]\n",
      "[ 0  1  2  3  4 10 11 12 13 14]\n",
      "[5 6 7 8 9]\n",
      "[0 1 2 3 4 5 6 7 8 9]\n",
      "[10 11 12 13 14]\n"
     ]
    }
   ],
   "source": [
    "for train,val in k.split(np.arange(15)):\n",
    "    print(train)\n",
    "    print(val)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 135,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "[ 5  6  7  8  9 10 11 12 13 14]\n",
      "[0 1 2 3 4]\n",
      "[ 0  1  2  3  4 10 11 12 13 14]\n",
      "[5 6 7 8 9]\n",
      "[0 1 2 3 4 5 6 7 8 9]\n",
      "[10 11 12 13 14]\n"
     ]
    }
   ],
   "source": [
    "for train,val in k.split(np.arange(15)):\n",
    "    print(train)\n",
    "    print(val)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 140,
   "metadata": {},
   "outputs": [],
   "source": [
    "a=np.array([3,2,1])"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 137,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "(1, 3)"
      ]
     },
     "execution_count": 137,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "a.shape"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 138,
   "metadata": {},
   "outputs": [],
   "source": [
    "b=[4,5,3,2,1]"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 148,
   "metadata": {},
   "outputs": [],
   "source": [
    "c=np.arange(15)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 153,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "array([[1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1.],\n",
       "       [1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1.],\n",
       "       [1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1.],\n",
       "       [1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1.],\n",
       "       [1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1.],\n",
       "       [1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1.],\n",
       "       [1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1.],\n",
       "       [1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1.],\n",
       "       [1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1.],\n",
       "       [1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1.]])"
      ]
     },
     "execution_count": 153,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "c=np.ones((15,15))\n",
    "c[train,:]"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": []
  }
 ],
 "metadata": {
  "kernelspec": {
   "display_name": "Python 3",
   "language": "python",
   "name": "python3"
  },
  "language_info": {
   "codemirror_mode": {
    "name": "ipython",
    "version": 3
   },
   "file_extension": ".py",
   "mimetype": "text/x-python",
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython3",
   "version": "3.6.4"
  }
 },
 "nbformat": 4,
 "nbformat_minor": 2
}
